use anyhow::Result;
use regex::Regex;
use std::path::PathBuf;
use tensorzero_core::inference::types::{
    ContentBlockChatOutput, StoredInput, StoredInputMessage, StoredInputMessageContent, System,
    Text,
};
/*
This file handles the outputs of inferences from Cursor. We handle two cases:
Cursor Ask (from the sidebar where you ask a question and there are also code completions included).
Cursor Edit (Cmd-K from the code directly).
*/

#[derive(Debug)]
pub struct CursorCodeBlock {
    pub language_extension: String,
    pub path: PathBuf, // relative to the Cursor workspace
    pub content: String,
}

/// Route the output to the appropriate parser based on the system message.
/// This could be avoided if we could hardcoded different TensorZero functions for different tasks.
/// Instead, we use the content of the system message to determine which parser to use.
/// This will be brittle to Cursor changing their system prompt.
pub fn parse_cursor_output(
    input: &StoredInput,
    output: &Vec<ContentBlockChatOutput>,
) -> Result<Vec<CursorCodeBlock>> {
    let Some(System::Text(system)) = &input.system else {
        return Err(anyhow::anyhow!("No system message found"));
    };
    let output_text = match output.as_slice() {
        [ContentBlockChatOutput::Text(t)] => &t.text,
        _ => {
            return Err(anyhow::anyhow!("Output is not a single text block"));
        }
    };
    if system.contains("rewrite a piece of code") {
        return parse_cursor_edit_output(&input.messages, output_text);
    }
    if system.contains("insert a piece of code") {
        return parse_cursor_insert_output(&input.messages, output_text);
    }
    if system.contains("pair programming with a USER to solve their coding task") {
        return parse_cursor_ask_output(output_text);
    }
    // TODO: handle the following:

    /*
     System message doesn't fit our expected format: You are an intelligent programmer. You are helping a colleague write a terminal command. In your response, only output the terminal command to write, surrounded by backticks. If you want to run a script, make sure to use the correct path relative to the active terminal's working directory. Immediately start your response with <cmd>.

    Only output a single command, but make it however complicated it needs to be. If you want it to be multiline, remember to appropriately use "\" or "`" to delimit the lines.

    Produce nothing else other than the command to run, on a single line. Surround the insertion with the tags <cmd> and</cmd>. For example, if you want to run ls -la, you would write <cmd>ls -la</cmd>.
     */
    Err(anyhow::anyhow!(
        "System message doesn't fit our expected format: {system}",
    ))
}

/// Parse Cursor Ask output.
///
/// According to their system prompt, the output format for code blocks in Cursor is:
/// ```language:path/to/file
/// // ... existing code ...
/// {{ edit_1 }}
/// // ... existing code ...
/// {{ edit_2 }}
/// // ... existing code ...
/// ```
/// which might be inside of a larger string that contains other text as well.
/// In this file, we take a particular Cursor output (as a &str)
/// and return a Vec<CursorCodeBlock>.
///
/// Note: all paths here and in the rest of this file are relative to the Cursor workspace root.
/// later on, we'll need to normalize these paths to the git root.
fn parse_cursor_ask_output(output_text: &str) -> Result<Vec<CursorCodeBlock>> {
    //  Find all ```lang:rel/path/to/file\n...``` blocks in the output.
    //  We use (?s) so `.` matches newlines, and make the match non‑greedy.
    let block_re =
        Regex::new(r"(?s)```(?P<lang>[^:\n]+):(?P<file>[^\n]+)\r?\n(?P<content>.*?)```")?;

    let mut blocks = Vec::new();
    for cap in block_re.captures_iter(output_text) {
        let language_extension = cap["lang"].to_string();
        let file_ref = &cap["file"];
        let content = cap["content"].to_string();

        blocks.push(CursorCodeBlock {
            language_extension,
            path: file_ref.to_string().into(),
            content,
        });
    }

    Ok(blocks)
}

/// Parse Cursor Edit output.
///
/// To get the file name: The second user message seems to consistently contain
/// the header ## Selection to Rewrite and then a code block on the next line.
///
/// For example:
/// ...
// ## Selection to Rewrite
// ```src/cursor.rs
//     // Start of Selection
//     let system = match system {
//         Some(Value::String(s)) => s,
//         _ => return Err(anyhow::any
/// ...
///
/// Therefore, we use a regex to extract the file name from the second user message.
///
/// To get the code generated:
/// The generated output seems to always look like this:
/// ```text
///     // Start of Selection
///     let Some(Value::String(system)) = system else {
///         return Err(anyhow::anyhow!("No system message found"));
///     };
///     // End of Selection
/// ```
/// where the code block is the code generated by Cursor.
/// Therefore, we can simply extract the code block from the output and remove the comments.
/// This is equivalent to taking the whole string except for the first two lines and the last 2 lines.
fn parse_cursor_edit_output(
    messages: &[StoredInputMessage],
    output_text: &str,
) -> Result<Vec<CursorCodeBlock>> {
    // There should be 2 messages in the input:
    if messages.len() != 2 {
        return Err(anyhow::anyhow!(
            "Expected 2 messages in input, got {}",
            messages.len()
        ));
    }
    let second_message = &messages[1];
    let second_message_text = match second_message.content.as_slice() {
        [StoredInputMessageContent::Text(Text { text })] => text,
        _ => {
            return Err(anyhow::anyhow!("Expected text in second user message"));
        }
    };
    // 1. Extract the relative path from the second user message.
    let file_re = Regex::new(r"## Selection to Rewrite\r?\n```(?P<file>[^\n]+)")?;
    let file_ref = file_re
        .captures(second_message_text)
        .and_then(|cap| cap.name("file"))
        .map(|m| m.as_str())
        .ok_or_else(|| anyhow::anyhow!("Couldn't find file reference in edit selection"))?;
    let file_ref = PathBuf::from(file_ref);
    // 2. Extract the generated code from the output.
    //    We simply strip the first two lines and the last two lines of the output.
    let lines: Vec<&str> = output_text.lines().collect();

    // Check if there are enough lines to remove the first two and last two.
    if lines.len() < 4 {
        // If there are fewer than 4 lines, removing the first 2 and last 2 is not possible
        // or results in an empty/negative range. Return an error or an empty block?
        // Let's return an error for clarity, as the expected format isn't met.
        return Err(anyhow::anyhow!(
            "Output text has fewer than 4 lines ({} lines), cannot remove first 2 and last 2 lines. Output: '{}'",
            lines.len(),
            output_text
        ));
    }
    // Extract the code content by skipping the first two and last two lines.
    let code_content_lines = &lines[2..lines.len() - 2];
    let content = code_content_lines.join("\n");
    // 3. Get the git root relative path from the workspace path.
    let language_extension = file_ref
        .extension()
        .map(|ext| ext.to_string_lossy())
        .ok_or_else(|| anyhow::anyhow!("No extension found in file reference"))?
        .to_string();

    let code_block = CursorCodeBlock {
        language_extension,
        path: file_ref,
        content,
    };

    // Return a Vec containing this single block
    Ok(vec![code_block])
}

/// Parse Cursor Insert output.
///
/// This output appears if the user uses CMD-K but doesn't select any text.
///
/// To get the current file name: There is a line in the first user message that says "This is my current file..."
/// and then the next line is ```path/to/file.ext
/// We can grab this via a regex.
fn parse_cursor_insert_output(
    messages: &[StoredInputMessage],
    output_text: &str,
) -> Result<Vec<CursorCodeBlock>> {
    if messages.is_empty() {
        return Err(anyhow::anyhow!(
            "Expected at least one message for insert mode"
        ));
    }

    // Extract workspace path from the first message
    let first_message = &messages[0];
    let first_message_text = match first_message.content.as_slice() {
        [StoredInputMessageContent::Text(Text { text })] => text,
        _ => {
            return Err(anyhow::anyhow!(
                "Expected the first user message to contain a plain-text block"
            ));
        }
    };
    // Only match a ```path/to/file.ext``` fence if it's right after
    // a line that begins with "This is my current file"
    let file_re = Regex::new(r"(?m)^This is my current file[^\r\n]*\r?\n```(?P<file>[^\r\n]+)")?;
    let file_ref = file_re
        .captures(first_message_text)
        .and_then(|cap| cap.name("file"))
        .map(|m| m.as_str())
        .ok_or_else(|| {
            anyhow::anyhow!(
                "Couldn't find a line starting with \"This is my current file\" \
                 followed by a ```file/path``` fence"
            )
        })?;
    let file_ref = PathBuf::from(file_ref);
    let language_extension = file_ref
        .extension()
        .map(|ext| ext.to_string_lossy())
        .ok_or_else(|| anyhow::anyhow!("No extension found in file reference"))?
        .to_string();
    // Grab the code block from the output text.
    // We skip the first 2 lines and the last 2 lines of the output text.
    let lines: Vec<&str> = output_text.lines().collect();
    let code_content_lines = &lines[2..lines.len() - 2];
    let content = code_content_lines.join("\n");
    let code_block = CursorCodeBlock {
        language_extension,
        path: file_ref,
        content,
    };
    Ok(vec![code_block])
}
